Estimated vs. Reported Populations of Community Water Systems
Load data
Show the code
# Load reported detailssystem.deets <-vroom(here("Input_Data/SDWIS/Water_System_Detail_2023Q4.csv"), show_col_types =FALSE)%>%setNames(str_replace_all(colnames(.)," ","_"))%>%select(PWS_Name,Primacy_Agency,PWS_ID,Population_Served_Count,Service_Connections_Count,Pop_Cat_5)# Load Census population countscensus <-vroom("D:/data/nhgis/tables/Blocks/nhgis0307_ds248_2020_block.csv")%>%select(GISJOIN,U7B001,U7G002)%>%setNames(c("GISJOIN","Population_C","Housing_O"))methods <-vroom(here("Output_Data/Final_052024.csv"))%>%select(PWSID_12,Method)# Load primary service area typessa.types <-vroom("D:/Github/ORD_SAB_Model/Input_Data/SDWIS/Primary_Service_Area_Type.csv")# Load CWS : Block crosswalk and join then filtercw <-vroom(here("Output_Data/Final_Blocks/CWS_Blocks_2023Q4.csv"))%>%mutate(Pct_Block =round(100*(I_Area_km/Block_Area_km),2))%>%left_join(census)%>%left_join(sa.types, by =c("PWSID_12"="PWSID"))%>%mutate(drop =ifelse(Pct_Block >=10| Service_Area_Type =="Mobile Home Park", FALSE,TRUE))%>%filter(drop ==FALSE)%>%select(GISJOIN,PWSID_12,Population_C,Housing_O)# Aggregate to the system and join reported detailscws <- cw%>%group_by(PWSID_12)%>%summarise(Population_C =sum(Population_C,na.rm=TRUE),Housing_O =sum(Housing_O,na.rm=TRUE))%>%left_join(system.deets, by =c("PWSID_12"="PWS_ID"))%>%left_join(sa.types, by =c("PWSID_12"="PWSID"))%>%mutate(PWS_Name =iconv(PWS_Name, "UTF-8", "UTF-8",sub=''))%>%left_join(methods)colnames(cws)[1] <-"PWSID"vroom_write(cws, here("Analysis/Population_Served/CWS_Populations.csv"), append =FALSE)
SDWIS Population Served vs. Service Connections
Even within SDWIS there is not a uniform relationship between Population Served and Service Connections
Show the code
cws <-vroom(here("Analysis/Population_Served/CWS_Populations.csv"))%>%drop_na(Method)lm <-lm(Population_Served_Count ~ Service_Connections_Count, data = cws)summary(lm)
Call:
lm(formula = Population_Served_Count ~ Service_Connections_Count,
data = cws)
Residuals:
Min 1Q Median 3Q Max
-2395985 -11 424 518 5869699
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -540.55959 184.53544 -2.929 0.0034 **
Service_Connections_Count 3.30351 0.01136 290.688 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 37920 on 43153 degrees of freedom
(152 observations deleted due to missingness)
Multiple R-squared: 0.662, Adjusted R-squared: 0.6619
F-statistic: 8.45e+04 on 1 and 43153 DF, p-value: < 2.2e-16
Formula:
\[POP_{served}=Connections*3.304-540.56\]
R2 = 0.66
Show the code
new.df <-data.frame(Service_Connections_Count =c(0,3000000))new.df$Population_Served_Count <-predict(lm, newdata = new.df)plot_ly()%>%add_lines(data = new.df, x =~Service_Connections_Count, y =~Population_Served_Count,name ="Regression", line =list(color ='#00673E', width =4, dash ='dot'))%>%add_markers(data = cws, x =~Service_Connections_Count, y =~Population_Served_Count, color =~Method,text =~paste(PWS_Name,"<br>","PWS ID: ",PWSID,"<br>","Census Population: ", format(Population_C, big.mark=","),"<br>","SDWIS Population: ",format(Population_Served_Count, big.mark =","),"<br>","SDWIS Connections: ",format(Service_Connections_Count, big.mark=","),"<br>","Housing Units: ",format(Housing_O, big.mark =","),"<br>","Method:", Method),hoverinfo ='text')%>%layout(title ="",xaxis =list(title ="Service Connections Count"),yaxis =list (title ="Population Served Count"))
Census Population vs. Reported Population
Show the code
plot_ly(cws)%>%add_lines(x =c(0,9000000), y =c(0,9000000),name ="1:1", line =list(color ='#00673E', width =4, dash ='dot'))%>%add_markers(x =~Population_C, y =~Population_Served_Count, color =~Method,text =~paste(PWS_Name,"<br>","PWS ID: ",PWSID,"<br>","Census Population: ", format(Population_C, big.mark=","),"<br>","SDWIS Population: ",format(Population_Served_Count, big.mark =","),"<br>","SDWIS Connections: ",format(Service_Connections_Count, big.mark=","),"<br>","Housing Units: ",format(Housing_O, big.mark =","),"<br>","Method:", Method),hoverinfo ='text')%>%layout(title ="",xaxis =list(title ="Census Calculated Population (2020)"),yaxis =list (title ="SDWIS Reported Population"))
Census Occupied Housing Units vs. Reported Service Connections
Show the code
plot_ly(cws)%>%add_lines(x =c(0,3500000), y =c(0,3500000),name ="1:1", line =list(color ='#00673E', width =4, dash ='dot'))%>%add_markers(x =~Housing_O, y =~Service_Connections_Count, color =~Method,text =~paste(PWS_Name,"<br>","PWS ID: ",PWSID,"<br>","Census Population: ", format(Population_C, big.mark=","),"<br>","SDWIS Population: ",format(Population_Served_Count, big.mark =","),"<br>","SDWIS Connections: ",format(Service_Connections_Count, big.mark=","),"<br>","Housing Units: ",format(Housing_O, big.mark =","),"<br>","Method:", Method),hoverinfo ='text')%>%layout(title ="",xaxis =list(title ="Census Calculated Occupied Housing Units (2020)"),yaxis =list (title ="SDWIS Reported Service Connections"))